## "Where are the missing dead" replication
##
## Reproduce: Workplace Injury Insurance claimants
#
##            - Figure 5
##            
## 
## Input:     main.dta


## Guoer Liu
## 7/31/2022 


setwd("~/Dropbox/Missing dead/replication/")


library(haven)
library(foreign)
library(estimatr)
library(scales)
library(modelsummary) ## modelsummary(): for lm_robust object
library(extrafont) ## change fonts in plot


### Function 

dataCollapse <- function(vector, year.v, treat.ind){
    # Args:
    #   vector:     n x 1 vector to be collapsed by province-treat
    #   year.v:     n x 1 vector
    #   treat.ind:  n x 1 vector of treatment indicator
    out <- tapply(vector, 
                  list(year.v, treat.ind), 
                  function(x) mean(x, na.rm = T))
    return(out)
}


paraPlot <- function(collapMat, 
                    matrix.x.lab = NULL, matrix.title = NULL,
                    l.mar = 3.5,
                    hline = NULL,
                    x.axis.vec = NULL,
                    y.lab = TRUE, y.lab.names = NULL,
                    y.min = NULL, y.max = NULL,
                    legend = TRUE, leg.pos = NULL){
    # Args:
    #   collapMat:      n x 2 matrix (column 1: control; column 2: treat)
    #   matrix.x.lab:   a string of figure x axis label
    #   matrix.title:   a string of figure title
    #   hline:          a scalar indicate the position of vertical line
    #   x.axis.vec:     a length n string vector that shows x axis
    #   y.lab:          logical parameter to turn off y label
    #   y.lab.names:    a string of y axis lable
    #   y.min:          a scalar indicates the minimum value on y axis
    #   y.max:          a scalar indicates the maximum value on y axis
    #   legend:         logical parameter to turn off legend
    #   leg.pos:        a string indicates legend position
    pch.cex <- 1.6
    ctl.v <- collapMat[, 1]
    tr.v <- collapMat[, 2]
    #y.max <- max(x)
    #y.min <- min(x)
    x.len <- nrow(collapMat)

    par(mar= c(4, l.mar, 1, 1))
    plot(NA, xlim = c(0.5, x.len), 
             ylim = c(y.min, y.max), 
             type = "n", 
             xlab = matrix.x.lab,
             main = matrix.title, 
             ylab = "", axes = F, cex.lab = 1.3)
    grid(nx = NULL, ny = NULL,
         lty = 2,      # Grid line type
         col = "gray", # Grid line color
         lwd = 0.6)
    abline(v = hline, col = alpha("orangered", 0.8), lwd = 1.5)
    lines(ctl.v, lty = 2, lwd = 1.5)
    points(ctl.v, cex = pch.cex, pch = 1, col = "black")
    lines(tr.v, lty = 1, lwd = 1.5)
    points(tr.v, cex = pch.cex, pch = 19, col = "black", bg = "black")
    axis(1, at = seq(1, x.len, 1), label = x.axis.vec, 
         cex.axis = 1, las = 1)
    if (y.lab == TRUE){
        axis(2, at = seq(ceiling(y.min), floor(y.max), 1), 
            label = seq(ceiling(y.min), floor(y.max), 1),
            cex.axis = 1, las = 1)
        title(ylab = y.lab.names, line = 2, cex.lab = 1)
    }
    if (legend == TRUE){
        legend(leg.pos, title = NULL, 
               legend = c("Treated", "Control"), 
               col = "black", 
               lty = c(1, 2), cex = 0.8,
               pch = c(19, 1),
               lwd = c(1, 1))
    }
}


mydata <- read_dta("main.dta", encoding = "UTF-8")
mydata <- as.data.frame(mydata)



wkInj.ratio_treat_ctrl <- dataCollapse(mydata$wkInj_ratio_vec, 
                                       mydata$year, mydata$treat_group)
wkInj.ratio_treat_ctrl <- wkInj.ratio_treat_ctrl * 100

## Plot: Occupational insurance

#pdf("f5_wkInj.pdf", family = "Times New Roman", 
#    width = 4.2, height = 4)
paraPlot(wkInj.ratio_treat_ctrl, 
         "Time", "Occupational Accident Insurance Claimant",
          hline = 4, 
          x.axis.vec = seq(2000, 2005, 1), 
          y.lab = TRUE, y.lab.names = "Percentage point",
          y.min = 0, y.max = 2,
          legend = TRUE, leg.pos = "topright")
#dev.off()


## Table

outcome <- c("wkInj_ratio_vec")

cov.names <- c("log_pop2", "rural_pop_pct",
               "log_light", "log_total_tax_rev_pc",
               "log_coal_produ0", "log_mine_indwage",
               "log_traf_indwage",
               "sec_tty", "sec_age", 
               "sec_promotion", "gov_tty", 
               "gov_age", "gov_promotion")

t.var <- c("did", "treat_time", "treat_group")


rhs.1 <- paste('~', paste(t.var, collapse = " + "))
rhs.2 <- paste('~', paste(c(t.var, cov.names), collapse = " + "))
rhs.3 <- paste('~', paste(c(t.var, cov.names), collapse = " + "), 
                          "+ factor(year)")
rhs.4 <- paste('~', paste(c(t.var, cov.names), collapse = " + "), 
                          "+ factor(year) + factor(prov_id)")
rhs.list <- list(rhs.1, rhs.2, rhs.3, rhs.4)


estimate <- NULL
SE <- NULL
mod.out <- list()



## Table ##


for(j in 1:length(rhs.list)){
    formula <- as.formula(paste(outcome, rhs.list[[j]]))
    mod <- lm_robust(formula, cluster = prov_id, data = mydata)
    estimate <- c(estimate, coefficients(mod)['did'])
    SE <- c(SE, summary(mod)$coefficients['did', 2])
    mod.out <- append(mod.out, list(mod))
}



coef.df <- data.frame(coef = estimate,
                      se = SE)

##            coef          se
## 1 -0.0006737402 0.002518412
## 2  0.0008816501 0.003121268
## 3  0.0015831710 0.003318922
## 4 -0.0028160635 0.003035193  ## most strigent specification
                                ## controls + year FE + prof FE


modelsummary(mod.out, coef_map = c("did", "treat_time", "treat_group"))
## same information as above, omit in the paper.









